# Alexander F. Gazmararian
# agazmararian@gmail.com
#
# This script generates Figure 1 (project distance map visualization).
# REQUIRES: Coordinate data (survey_visibility_processed_with_geo.rds)
# In anonymized replication packages, this script is skipped because
# respondent coordinates are removed to protect privacy.
# See README.md for information on accessing coordinate data.

library(here)
source(here("analysis", "visibility", "analysis_config.R"))
source(here("R", "load_functions.R"))
source(here("R", "visibility", "replication_mode.R"))

# Check replication mode - this script requires coordinates
REPLICATION_MODE <- init_replication_mode()

geo_file <- here("data", "inter", "survey_visibility_processed_with_geo.rds")
if (!file.exists(geo_file)) {
  message("=== SKIPPING visualize_project_distance.R ===")
  message("This script generates Figure 1 (project/respondent distance map)")
  message("REASON: Coordinate data not available (anonymized replication package)")
  message("")
  message("To reproduce this figure, you need access to respondent coordinates.")
  message("See README.md section 'Restricted Data Access' for instructions.")
  message("")
  message("The pre-generated figure is included in output/pnas/figures/")
} else {

library(rnaturalearth)
library(sf)
library(terra)
library(tigris)
library(ggtext)
library(ggrepel)

# Configuration ----
# Using Albers Equal Area projection for accurate distance calculations across the continental US
crs.proj <- "ESRI:102003"  # North America Albers Equal Area Conic

# Load data ----
# Survey data
g <- readRDS(geo_file)
if (st_crs(g) != crs.proj) {
  g <- st_transform(g, crs = crs.proj)
}

# Population density (from cache directory)
popd <- read_csv(get_popdensity_cache_path(), show_col_types = FALSE)
g <- left_join(g, popd, by = "response_id")

# Get green energy projects
eia <- readRDS(here("data", "inter", "eia_survey_processed.rds"))
eia <- st_as_sf(eia, coords = c("lon", "lat"), crs = 4326)
eia <- st_transform(eia, crs = crs.proj)
eia <- eia %>%
  rename(status_eia = status)

# Get supply chain projects
bgm <- readRDS(here("data", "inter", "bgm_processed.rds"))
bgm <- bgm %>%
  filter(year(date) < 2025 & date >= as.Date("2022-08-16"))
bgm <- bgm %>%
  filter(!is.na(longitude) & !is.na(latitude))
nrow(bgm)
#!operating_status %in% c("Cancelled", "Closed", "Rumored", "Sold") & 
bgm <- st_as_sf(bgm, coords = c("longitude", "latitude"), crs = 4326)
bgm <- st_transform(bgm, crs = crs.proj)

# Flag energy projects that fall outside of shape boundaries
states <- states(cb = TRUE, year = 2021)
states <- st_transform(states, crs = crs.proj)

# Check BGM projects
bgm <- st_join(bgm, states, left = TRUE)
bgm$points_within <- as.integer(!is.na(bgm$NAME))
message(paste0("BGM projects within US (%): ", round(sum(bgm$points_within) / nrow(bgm) * 100, 2)))
bgm <- filter(bgm, points_within == 1)

# Check EIA projects
eia <- st_join(eia, states, left = TRUE)
eia$points_within <- as.integer(!is.na(eia$NAME))
message(paste0("EIA projects within US (%): ", round(sum(eia$points_within) / nrow(eia) * 100, 2)))
# Note: Keep because these are offshore wind turbines

# Create Fig. 1----

states <- states %>%
  filter(!STUSPS %in% c("GU", "HI", "AK", "VI", "MP", "AS", "PR"))

## Panel A---
g.plot <- g %>%
  filter(!state %in% c("Alaska", "Hawaii"))

p.sample <- ggplot() +
  geom_sf(data = states, fill = "white", color = "darkgray", size = 0.3) +
  geom_sf(data = g.plot, aes(geometry = geometry), 
          color = "#666666", size = 0.2, alpha = 0.6) +
  ggplot2::theme_void(base_size = 10) +
  theme(panel.grid = element_blank()) +
  labs(title = paste0("Survey respondents"), tag = "A")


## Panel B----
df.eia <- eia %>%
  filter(STATEFP != "15" & STATEFP != "02" & 
  date > as.Date("2022-08-16") & !technology %in% c("Gas", "Batteries") & 
  points_within == 1) %>%
  dplyr::select(id, technology, NAME, cap) %>%
  distinct() %>%
  mutate(
    technology = case_when(
      technology == "Solar Photovoltaic" ~ "Solar",
      technology == "Onshore Wind Turbine" ~ "Wind"
    )
  )

p.eia <- ggplot() +
  geom_sf(data = states, fill = "white", color = "darkgray", size = 0.3) +
  geom_sf(data = df.eia, aes(geometry = geometry, size = log(cap),
   shape = technology, color = technology), alpha = .9) +
  scale_color_manual(values = c("Solar" = "#E69F00", "Wind" = "#56B4E9")) +
  scale_shape_manual(values = c("Solar" = 17, "Wind" = 15)) +
  scale_size_continuous(name = "Capacity (MW)", range = c(0.1, 1), 
                       trans = "identity", guide = "legend") +
  ggplot2::theme_void(base_size = 10) +
  theme(panel.grid = element_blank(), legend.position = "none") +
  labs(title = paste0("Renewable energy projects"), color = NULL, shape = NULL, tag = "B")

## Panel C----
bgm.24 <- bgm %>%
  filter(!state_province %in% c("AK", "HI"))
p.bgm <- ggplot() +
  geom_sf(data = states, fill = "white", color = "darkgray", size = 0.3) +
  geom_sf(data = bgm.24, aes(geometry = geometry, color = sector, shape = sector), 
  size = 1.5) +
  scale_color_manual(values = c("Batteries" = "#000000", "EVs" = "#009E73", "Solar" = "#E69F00", "Wind" = "#56B4E9")) +
  scale_shape_manual(values = c("Batteries" = 18, "EVs" = 19, "Solar" = 17, "Wind" = 15)) +
  ggplot2::theme_void(base_size = 10) +
  theme(panel.grid = element_blank(), legend.position = "none") +
  labs(title = paste0("Green manufacturing projects"), color = NULL, shape = NULL, tag = "C")


## Panel D----
g.mi <- g %>% filter(state == "Michigan")
bgm.mi <- bgm.24 %>% filter(NAME == "Michigan")
eia.mi <- df.eia %>% filter(NAME == "Michigan")
state.mi <- states %>% filter(NAME == "Michigan")

g.mi$type <- "Survey respondent"
g.mi <- subset(g.mi, select = c(type, geometry))

bgm.mi$type <- bgm.mi$sector
bgm.mi <- subset(bgm.mi, select = c(type, geometry))

eia.mi$type <- eia.mi$technology
eia.mi <- subset(eia.mi, select = c(type, geometry))

df.mi <- rbind(g.mi, bgm.mi, eia.mi)

# Create comprehensive legend plot with all categories
legend_data <- data.frame(
  type = c("Survey respondent", "Solar", "Wind", "Batteries", "EVs"),
  x = 1:5,
  y = 1
)

p.legend <- ggplot(legend_data, aes(x = x, y = y, color = type, shape = type)) +
  geom_point(size = 3) +
  scale_color_manual(values = c("Survey respondent" = "#666666", 
    "Batteries" = "#000000",
    "EVs" = "#009E73",
    "Solar" = "#E69F00",
    "Wind" = "#56B4E9")) +
  scale_shape_manual(values = c("Survey respondent" = 16, 
    "Batteries" = 18,
    "EVs" = 19,
    "Solar" = 17,
    "Wind" = 15)) +
  ggplot2::theme_void() +
  theme(
    legend.position = "bottom",
    legend.title = element_blank(),
    legend.box = "horizontal"
  ) +
  guides(color = guide_legend(override.aes = list(size = 3)))

# Extract legend
# Use null device to prevent Rplots.pdf creation in non-interactive sessions
library(ggpubr)
pdf(NULL)
common_legend <- get_legend(p.legend)
invisible(dev.off())

p.mi <- ggplot() +
  geom_sf(data = state.mi, fill = "white", color = "darkgray", size = 0.3) +
  geom_sf(data = df.mi, aes(geometry = geometry, color = type, shape = type), 
  size = 1.5) +
  scale_color_manual(values = c("Survey respondent" = "#666666", 
    "Batteries" = "#000000",
    "EVs" = "#009E73",
    "Solar" = "#E69F00",
    "Wind" = "#56B4E9")) +
  scale_shape_manual(values = c("Survey respondent" = 16, 
    "Batteries" = 18,
    "EVs" = 19,
    "Solar" = 17,
    "Wind" = 15)) +
  ggplot2::theme_void(base_size = 10) +
  theme(panel.grid = element_blank(), legend.position = "none") +
  labs(title = "Zoomed-in example", color = NULL, shape = NULL, tag = "D")

library(patchwork)
# First combine the 4 main plots (already tagged individually)
p.main <- p.sample + p.eia + p.bgm + p.mi + 
  plot_layout(ncol = 2)

# Then add the legend without additional tagging
p.fig1 <- p.main / common_legend +
  plot_layout(heights = c(10, 1))
  
save_pnas_pdf(
  p.fig1,
  here("output", "pnas", "figures", "fig_1_geographic_vis.pdf"),
  width = "double",
  height_cm = 13
)

} # End of coordinate-requiring block